**# DOCUMENTATION
/*
 Part 1: Collect financials between 2000-2013. Then for each firm, it fills the years gaps since incorporation year.
 Part 2: Create the Aggregate outputs using the outcome from Part 1.					 
 Part 3: Clean folder and delete temporal files.			 
*/

**# Setting and Configurations
cls 
clear
set more off
global path_output "/Reporting/"
global path_input "/ORBIS/Financials/"
global start_year = 2000
 
**# PART 1: Collect unconsolidated financials between 2000-2013 from orbis files AT_2000.dta, At_2001.dta, etc.
cd "${path_input}"	 

local country AT BE DE DK ES FI FR GB IE IT NL SE GB IE IT NL SE
foreach c of local country {
	forvalues i = ${start_year}(1)2014{
		append using `c'_`i'.dta, keep(bvid name_internat  country  closdate_year  nacepcod2 ussicpcod   slegalf listed dateinc  conscode fias tfas cash toas ncli ltdb stok debt loan opre turn empl pl depr plat extr  roa) force
	}
	cd "${path_output}"
keep if conscode=="U1" | conscode=="U2" 
	drop if missing(dateinc)   
	tostring dateinc, gen(dateinc2)
	gen dateinc3=date(dateinc2,"YMD")
	replace dateinc3=date(dateinc2,"YM") if missing(dateinc3)
	replace dateinc3=date(dateinc2,"Y") if missing(dateinc3)
	gen dateinc_year=year(dateinc3)
	drop dateinc dateinc2 dateinc3
		
	duplicates drop bvid closdate_year, force
	egen bvid2=group(bvid)
	xtset bvid2 closdate_year, yearly
	gen sales= turn
	replace sales =opre if missing(turn)
	//correcting sic codes
	rename ussicpcod sic
	tostring sic, replace
	gen siclength=length(sic) // in the original file, shorter SIC codes miss first 0s
	replace sic = "0"+ sic if siclength==3
	replace sic = "00"+ sic if siclength==2

	
	// save only new firms 
	display "Process new firms"
	preserve  
	keep if dateinc_year>=2000
	bysort bvid2 (closdate_year): gen unobserved_years =(closdate_year[1]>dateinc_year)
	bysort bvid2 (closdate_year): gen is_first_obs =((_n==1) & unobserved_years==1)
	expand 2 if is_first_obs==1, gen(first_unobserved_year)  // creating a new observation for the (missing) inc_year of the firm
	replace closdate_year=dateinc_year if first_unobserved_year==1	//the new generated observation is the one for the incorporation year
	replace toas =. if first_unobserved_year==1  //we dont have financials for that unobserved year
	replace empl =. if first_unobserved_year==1
	replace opre =. if first_unobserved_year==1
	replace turn =. if first_unobserved_year==1
	replace sales =. if first_unobserved_year==1 

	display "tsfill 1"		// fill with obs between the inc_year and the last obs of the new firm
	tsfill
	gen new=1                                      //for identifying "new firms" (maybe I can drop it)
	bysort bvid2 (closdate_year): replace country=country[_N] if country==""
	bysort bvid2 (closdate_year): replace bvid =bvid[_N] if missing(bvid)
	bysort bvid2 (closdate_year): replace bvid2 =bvid2[_N] if missing(bvid2)
	bysort bvid2 (closdate_year): replace conscode =conscode[_N] if missing(conscode)
	bysort bvid2 (closdate_year): replace dateinc_year =dateinc_year[_N] if missing(dateinc_year)
	bysort bvid2 (closdate_year): replace listed =listed[_N] if missing(listed) 
	bysort bvid2 (closdate_year): replace delisted_date =delisted_date[_N] if missing(delisted_date)  //keep the delisted date
	bysort bvid2 (closdate_year): replace sic =sic[_N] if missing(sic)
	bysort bvid2 (closdate_year): replace nacepcod2 =nacepcod2[_N] if missing(nacepcod2)
	gen slegalf2=slegalf
	bysort bvid2 (closdate_year): replace slegalf2 = slegalf2[_n-1] if missing(slegalf) & _n>1  //keep the legal status of the first obs
	save newfirms.dta, replace 
	
	restore 

	//save dead firms
	display "Process dead firms"	
	preserve   
	keep if dateinc_year<2000
	bysort bvid2 (closdate_year): gen is_in_2014=(closdate_year[_N]==2014)
	keep if is_in_2014==0
	drop is_in_2014
	bysort bvid2 (closdate_year): gen unobserved_years =(closdate_year[1]>dateinc_year)
	bysort bvid2 (closdate_year): gen is_first_obs =((_n==1) & unobserved_years==1)
	expand 2 if is_first_obs==1, gen(first_unobserved_year)
	replace closdate_year=2002 if first_unobserved_year==1  /
	replace toas =. if first_unobserved_year==1
	replace empl =. if first_unobserved_year==1
	replace opre =. if first_unobserved_year==1
	replace turn =. if first_unobserved_year==1
	replace sales =. if first_unobserved_year==1	
	duplicates drop bvid closdate_year, force   
	display "tsfill 2"    //fills between 2000 and the last obs of the firm
	tsfill       
	gen dead=1
	bysort bvid2 (closdate_year): replace country=country[_N] if country==""
	bysort bvid2 (closdate_year): replace bvid =bvid[_N] if missing(bvid)
	bysort bvid2 (closdate_year): replace bvid2 =bvid2[_N] if missing(bvid2)	
	bysort bvid2 (closdate_year): replace conscode =conscode[_N] if missing(conscode)
	bysort bvid2 (closdate_year): replace dateinc_year =dateinc_year[_N] if missing(dateinc_year)
	bysort bvid2 (closdate_year): replace listed =listed[_N] if missing(listed) 
	bysort bvid2 (closdate_year): replace delisted_date =delisted_date[_N] if missing(delisted_date)
	bysort bvid2 (closdate_year): replace sic =sic[_N] if missing(sic)
	bysort bvid2 (closdate_year): replace nacepcod2 =nacepcod2[_N] if missing(nacepcod2)
	gen slegalf2=slegalf
	bysort bvid2 (closdate_year): replace slegalf2 = slegalf2[_n-1] if missing(slegalf) & _n>1
	save deadfirms.dta, replace 
	restore     
	
	// balancing panel for survived firms with inc_date<2000
	display "Process always alive firms"
	keep if dateinc_year<2000
	bysort bvid2 (closdate_year): gen is_in_2014=(closdate_year[_N]==2014)
	keep if is_in_2014==1
	drop is_in_2014
	bysort bvid2 (closdate_year): gen unobserved_years =(closdate_year[1]>dateinc_year)
	bysort bvid2 (closdate_year): gen is_first_obs =((_n==1) & unobserved_years==1)
	expand 2 if is_first_obs==1, gen(first_unobserved_year)
	replace closdate_year=2002 if first_unobserved_year==1  
	replace toas =. if first_unobserved_year==1
	replace empl =. if first_unobserved_year==1
	replace opre =. if first_unobserved_year==1
	replace turn =. if first_unobserved_year==1
	replace sales =. if first_unobserved_year==1	
	duplicates drop bvid closdate_year, force   
	display "tsfill 3" 
	tsfill, full 
	gen alwaysalive=1
	bysort bvid2 (closdate_year): replace country=country[_N] if country==""
	bysort bvid2 (closdate_year): replace bvid =bvid[_N] if missing(bvid)
	bysort bvid2 (closdate_year): replace bvid2 =bvid2[_N] if missing(bvid2)	
	bysort bvid2 (closdate_year): replace conscode =conscode[_N] if missing(conscode)
	bysort bvid2 (closdate_year): replace dateinc_year =dateinc_year[_N] if missing(dateinc_year)
	bysort bvid2 (closdate_year): replace listed =listed[_N] if missing(listed) 
	bysort bvid2 (closdate_year): replace delisted_date =delisted_date[_N] if missing(delisted_date)
	bysort bvid2 (closdate_year): replace sic =sic[_N] if missing(sic)
	bysort bvid2 (closdate_year): replace nacepcod2 =nacepcod2[_N] if missing(nacepcod2)
	gen slegalf2=slegalf
	bysort bvid2 (closdate_year): replace slegalf2 = slegalf2[_n-1] if missing(slegalf)  & _n>1 
	save alwaysalive.dta, replace 
	
	// append base + dead + new
	append using newfirms.dta, force
	append using  deadfirms.dta, force
	compress
	replace	 slegalf=slegalf2
	drop unobserved_years is_first_obs first_unobserved_year slegalf2
	
	//correcting for delisted firms (some of them where delisted)
	tostring delisted_date, generate(delisted_date2)
	gen delisted_date3=date(delisted_date2, "YMD")
	replace delisted_date3=date(delisted_date2,"YM") if missing(delisted_date3)
	replace delisted_date3=date(delisted_date2,"Y") if missing(delisted_date3)
	format delisted_date3 %td
	gen year_delisted=year(delisted_date3)
	replace listed="Listed" if (listed=="Delisted" & closdate_year<year_delisted)
	
	// merge with tresholds for full-disclosure	
	merge m:1 country closdate_year using  "/mnt/DATA/Dropbox/Research Papers/14 Cash and disclosure/UE Document/UE_threshold_extended.dta"
	drop if _merge==2
	drop _merge
	gen fulldisclosure=0 if slegalf=="Private limited companies"
	replace fulldisclosure=1 if slegalf=="Private limited companies" & (toas>=tr_toas & empl>= tr_empl) & !missing(toas) & !missing(empl)
	replace fulldisclosure=1 if fulldisclosure==0 & slegalf=="Private limited companies" & (toas>=tr_toas & sales>=tr_sales) & !missing(toas) & !missing(sales)
	replace fulldisclosure=1 if fulldisclosure==0 & slegalf=="Private limited companies" & (empl>= tr_empl & sales>=tr_sales) & !missing(empl) & !missing(sales)	

	gen private=(slegalf=="Private limited companies")
	gen public=(slegalf=="Public limited companies")
	gen other=(private==0 & public==0)
	gen toas_priv=private*toas if private==1
	gen toas_privfull=private*toas*fulldisclosure if private==1 & fulldisclosure==1
	gen toas_publ=public*toas if public==1
	gen sales_priv=private*sales if private==1
	gen sales_privfull=private*sales*fulldisclosure if private==1 & fulldisclosure==1
	gen sales_publ=public*sales if public==1
	
	gen listedd=(listed=="Listed")
	gen toas_listed=listedd*toas 
	gen sales_listed=listedd*sales 
	
	replace dead=0 if missing(dead)
	replace new=0 if missing(new)
	replace alwaysalive=0 if missing(alwaysalive)
	gen sic_3d=substr(sic,1,3)	
	gen sic_2d=substr(sic,1,2)	
	drop if missing(sic_2d) | sic_2d=="."
	drop if missing(sic_3d) | sic_3d=="."
		
	save 1_Code_sample_extended_`c'.dta, replace 
	display "SAVED `c'"
	cd "${path_input}"
	clear
}
cd "${path_output}"
erase alwaysalive.dta
erase deadfirms.dta
erase newfirms.dta
///---- Loops ending ----

clear
set more off
cd "${path_output}"

**# Generating country-industry aggregations
local country AT BE DE DK ES FI FR  GB IE IT NL SE 
	foreach c of local country {
		use 1_Code_sample_extended_`c'.dta,
		rename closdate_year year		
		preserve
		** collapse for counting	
		collapse (count)num_firms=bvid2   num_firms_discl=toas num_firmspriv_disc=toas_priv num_firmspriv_fulldisc=toas_privfull num_firmspubl_disc=toas_publ  ///
				(sum) num_firmspriv=private num_firmspubl=public num_firmsother=other num_firmslisted=listedd indtoas=toas toas_priv toas_privfull toas_publ toas_listed indsales=sales sales_priv sales_privfull sales_publ sales_listed ///
				, by( year country sic_3d )						
		save 1_`c'-3d_scope.dta, replace		
		restore
		** collapse for counting	
		collapse (count)num_firms=bvid2   num_firms_discl=toas num_firmspriv_disc=toas_priv num_firmspriv_fulldisc=toas_privfull num_firmspubl_disc=toas_publ ///
				(sum) num_firmspriv=private num_firmspubl=public num_firmsother=other num_firmslisted=listedd indtoas=toas toas_priv toas_privfull toas_publ toas_listed indsales=sales sales_priv sales_privfull sales_publ sales_listed ///
				, by( year country sic_2d )						
		save 1_`c'-2d_scope.dta, replace	
		clear
	}	

// appending country-industry-aggregation 3-DIGITS
clear
local country AT BE DE DK ES FI FR GB IE IT NL SE 
	foreach c of local country {
			append using 1_`c'-3d_scope.dta, force
	}
	egen country_sic_3d=concat(country sic_3d) 
	egen country_sic= group(country_sic_3d) 	
	xtset country_sic year, yearly
	drop country_sic_3d country_sic
// droping years & country withtout threshold data	
	drop if year<${start_year}
	drop if year>2012
	drop if missing(sic_3d)
	drop if sic_3d=="."

save 2_scope-3d_v4.dta,replace

// appending country-industry-aggregation 2-DIGITS
clear
local country AT BE DE DK ES FI FR GB IE IT NL SE 
	foreach c of local country {
			append using 1_`c'-2d_scope.dta, force
	}
	egen country_sic_2d=concat(country sic_2d) 
	egen country_sic= group(country_sic_2d) 	
	xtset country_sic year, yearly
	drop country_sic_2d country_sic
	// droping years & country withtout threshold data	
	drop if year<${start_year}
	drop if year>2012
	drop if missing(sic_2d)
	drop if sic_2d=="."
save 2_scope-2d_v4.dta,replace


**# Deleting temp files 
foreach c of local country {
	erase  1_`c'-3d_scope.dta
	erase 1_`c'-2d_scope.dta
}
